rm(list = ls(all = TRUE))
#############################################################################################################
#############################################################################################################
# Estimates the Hedonic regressions and then does the leave-one-out Jacknife to estimate sampling variation #
#############################################################################################################
#############################################################################################################
library(foreign)
library(MASS)
library(Matrix)
library(Rsolnp)
library(lubridate)
make.data<-TRUE
source("weighted.squared.loss.R")
source("squared.loss.R")
source("add.up.R")
#source("min.dist.R")

# Competition Commission data
Mkt.Size <- rbind(1988:1998,c(16.1,17.7,19,17,16.5,17.7,19.2,21.0,20.9,25.2,28.4)*10^9)

load("ONScars.RData")
which(data[,"submodel"]==82 & data[,"pldm"]==6)->t    

data[t,"cd.player"]<-1
data[,"fuel.consumption.at.56.mph"] <- data[,"fuel.consumption.at.56.mph"]/100  # Units: liters/100km
data[,"acceleration"] <- data[,"acceleration"]/10 # 0-60mph in seconds
data[,"length"]<-data[,"length"]*2.5400013716/100 # In metres
data[,"height"]<-data[,"height"]*2.5400013716/100 # In metres
data[,"width"]<-data[,"width"]*2.5400013716/100 # In metres
data[,"pow_seat"]<-1*(data[,"pow_seat"]!=0)

data <- data[which(data[,"year"]>=88),]
#data <- data[which(data[,"year"]<95),]

########################
# Sort chronologically #
########################	
year 		<- data[,"year"]
month 		<- data[,"month"]
quarter 	<- floor((month-1)/3)+1
yrm 		<- year*100 + month
yrq 		<- year*100 + quarter
data		<- data[order(yrm),]  
models      <- sort(unique(data[,"model"]))
submodels   <- sort(unique(data[,"submodel"]))
makes	  	<- data[,42:57]
makes.cat 	<- apply(makes,1,function(x) which(x==1))
makers		<- names(data)[42:57]

country     <- matrix(0,dim(data)[1],8)
colnames(country)<-c("German", "French", "Italian", "US", "UK", "Japanese","Spanish", "Swedish")
country[,"German"]<-makes[,"audi"]+makes[,"bmw"]+makes[,"volkswag"]
country[,"Italian"]<-makes[,"alfa"]+makes[,"fiat"]
country[,"French"]<-makes[,"citroen"]+makes[,"peugeot"]+makes[,"renault"]
country[,"Japanese"]<-makes[,"nissan"]+makes[,"toyota"]
country[,"US"]<-makes[,"ford"]+makes[,"vauxhall"]
country[,"Spanish"]<-makes[,"seat"]
country[,"UK"]<-makes[,"rover"]
country[,"Swedish"]<-makes[,"volvo"]

marque		<- makers[makes.cat]
car         <- paste(marque,data[,"submodel"])     # There are 103 of these
cars		<- sort(unique(car))
data		<- cbind(data,car)
data		<- cbind(data,country)
years 		<- sort(unique(data[,"year"]))
months		<- sort(unique(data[,"month"]))
quarters    <- sort(unique(yrq))
sub.model	<- as.vector(data[,"car"])


chars<-c(13:40,42:56)  # Characteristics and makes
chars<-c(13:40,59:66)  # Characteristics and countries

# chars <- setdiff(chars,which(names(data)=="engine.size.cc"))
# chars <- setdiff(chars,which(names(data)=="length"))
chars <- setdiff(chars,which(names(data)=="width"))
chars <- setdiff(chars,which(names(data)=="height"))
chars <- setdiff(chars,which(names(data)=="pas_airb"))
# chars <- setdiff(chars,which(names(data)=="trip_com"))
chars <- setdiff(chars,which(names(data)=="max.speed"))
# chars <- setdiff(chars,which(names(data)=="head.lamp.cleaners"))
# chars <- setdiff(chars,which(names(data)=="all.electric.windows"))
chars <- setdiff(chars,which(names(data)=="cruise.control"))
# chars <- setdiff(chars,which(names(data)=="acceleration"))

# 1. Broad definition of a product: use unique sub.model as per the ONS data: 36 products
cars 		<- unique(sub.model)

# 2. Very narrow definition of a product: a fixed set of characteristics: 353 products
characteristics <-  as.matrix(data[,chars])
unique.characteristics <- characteristics[which(!duplicated(characteristics)),]
for (i in 1:dim(data)[1]){
	j <- which(apply((matrix(1,dim(unique.characteristics)[1],1)%*%characteristics[i,])==unique.characteristics,1,sum)==31)
	sub.model[i] <- paste("Model",j)
}
cars 		<- unique(sub.model)

# 3. Intermediate definition of goods: a fixed set of goods on the extensive margin but variable wrt intensive: 171 products
characteristics <-  1*(as.matrix(data[,chars])>0)
unique.characteristics <- characteristics[which(!duplicated(characteristics)),]
for (i in 1:dim(data)[1]){
	j <- which(apply((matrix(1,dim(unique.characteristics)[1],1)%*%characteristics[i,])==unique.characteristics,1,sum)==31)
	sub.model[i] <- paste("Model",j)
}
cars 		<- unique(sub.model)


A <- list()
P <- matrix(NA,length(cars),length(quarters))
colnames(P) <- quarters
rownames(P) <- cars
W <- P

##########################
# Step through the yrq's #
##########################	
	
for (t in 1:length(quarters)){
	s <- which(yrq==quarters[t])
	types<-unique(sub.model[s])
	A[[t]]<-matrix(NA,length(cars),length(chars))
	colnames(A[[t]])<-names(data[,chars])
	rownames(A[[t]])<-cars

	for (i in 1:length(cars)){
		if (cars[i] %in% types){
			j<-which(sub.model[s]==cars[i])
			
			if (length(j)>1){
				A[[t]][i,]	<- apply(data[s[j],chars],2,mean)
			} else {
				A[[t]][i,]	<- as.matrix(data[s[j],chars])
			}
			
			P[i,t]<- round(mean(data[s[j],"price"]))
			W[i,t]<- mean(data[s[j],"weight"])
			
		}
	}
}

W <- W/(matrix(1,length(cars),1)%*%apply(W,2,sum,na.rm=T))
X <- matrix(rep(Mkt.Size[2,1:8],4),8,4)
X <- as.vector(t(X))/4
X <- X[1:length(quarters)]
Q <- round((W*(matrix(1,length(cars),1)%*%X))/P)

J <- dim(A[[1]])[2]
K <- dim(A[[1]])[1]
T <- dim(P)[2]

R <- NULL
Z <- matrix(NA,J,T)

rownames(Z) <- colnames(A[[1]])
colnames(Z) <- quarters
Pi.est <- Z
log.P <-NULL
Big.A <-NULL
Time  <- NULL
Var.Pi <- list(list())
Pi.JK  <- list(list())

for (t in 1:T){ 
	
	q.pos <-which(Q[,t]>0)
	A.mat <- A[[t]]
	Z[,t] <- t(A.mat[q.pos,])%*%Q[q.pos,t]	
	z.pos <-which(Z[,t]>0)
	Pi.0<-rep(1,length(z.pos))
	LB <- as.vector(matrix(0,length(z.pos),1))
	UB <- as.vector(matrix(max(P[,t],na.rm=TRUE),length(z.pos),1))
	UB <- rep(Inf,length(z.pos))
	LB <- rep(0,length(z.pos))
    DGP <- solnp(pars=Pi.0, weighted.squared.loss, eqfun = add.up, eqB = 0, ineqfun =  NULL, ineqLB=NULL, ineqUB=NULL, LB, UB)
	Pi.est[z.pos,t] <-DGP$pars

	A.save <- A.mat
	P.save <- P
	Q.save <- Q
	R <- dim(Q)[1]
	Pi <- matrix(NA,J,R)
	
	for (r in 1:R){  # Leave-one-out estimates

		s <- setdiff(1:R,r)
		Q <- Q.save[s,]  # Sample 
		P <- P.save[s,]  # Sample 
		A.mat <- A.save[s,]  # Sample	
		
		q.pos <-which(Q[,t]>0)
		Z[,t] <- t(A.mat[q.pos,])%*%Q[q.pos,t]	
		z.pos <- which(Z[,t]>0)		
		Pi.0  <- rep(1,length(z.pos))
		LB    <- as.vector(matrix(0.0,length(z.pos),1))
		UB    <- as.vector(matrix(10000000,length(z.pos),1))
	    DGP   <- solnp(pars=Pi.0, squared.loss, eqfun = add.up, eqB = 0, ineqfun =  NULL, ineqLB=NULL, ineqUB=NULL, LB, UB)		
		Pi[z.pos,r] <- DGP$pars	
	}
	Var.Pi[[t]]<-cov(t(Pi),use="pairwise.complete.obs")

	A.mat 	<- A.save								# Reinstate the original A matrix
	Q 		<- Q.save
	P		<- P.save
	q.pos 	<- which(Q[,t]>0)
	Z[,t] 	<- t(A.mat[q.pos,])%*%Q[q.pos,t]	
	

	Pi.JK[[t]]<-Pi
	
	################################################
	# Build up the data for the repackaging model #
	################################################
	
	log.P <- c(log.P,log(P[,t]))
	Big.A <- rbind(Big.A,A.mat)
	tmp <- matrix(0,length(P[,t]),T)
	tmp[,t]<-1
	Time <- rbind(Time,tmp)

}

Repack.Index <- matrix(NA,T,T)
Repack.Index[1,1]<-1
Var.Repack <- list(list())
for (t in 2:T){
	s<-which(apply(Time[,1:t],1,sum)>0)
	Repack <- lm(log.P[s] ~ Time[s,] + Big.A[s,] -1)
	Repack.t <- Repack$coefficients[1:t]
	Repack.Index[t,1:t] <- Repack.t
	var <-vcov(Repack)
	Var.Repack[[t]]<-var
}


save(P,Q,Pi.est,Pi.JK,Var.Pi,Z,A,Repack.Index,Var.Repack,file = paste("results",K,".RData",sep=""))
